library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggfortify)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
#loading in data
kinetics_noise <- read.delim("ln_nas_linked_kinetics_noise.bed")
kinetics_noise <- kinetics_noise %>% filter(Next > 0)
#head(kinetics_noise)
ggplot(kinetics_noise) +
geom_point(aes(x=bs, y=Nint)) +
scale_y_log10() +
scale_x_log10() +
geom_smooth(aes(x=bs, y=Nint)) +
theme_classic(base_size = 18) +
ylab("Intrinsic Noise") +
xlab("Burst Frequency")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(kinetics_noise) +
geom_point(aes(x=k_on, y=Nint)) +
scale_y_log10() +
scale_x_log10() +
geom_smooth(aes(x=k_on, y=Nint))+
theme_classic(base_size = 18) +
ylab("Intrinsic Noise") +
xlab("Burst Frequency")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(kinetics_noise) +
geom_point(aes(x=bs, y=Next)) +
scale_y_log10() +
scale_x_log10() +
geom_smooth(aes(x=bs, y=Next))+
theme_classic(base_size = 18) +
ylab("Extrinsic Noise") +
xlab("Burst Frequency")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(kinetics_noise) +
geom_point(aes(x=bs, y=Ntot)) +
scale_y_log10() +
scale_x_log10() +
geom_smooth(aes(x=bs, y=Ntot))+
theme_classic(base_size = 18) +
ylab("Total Noise") +
xlab("Burst Frequency")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
mid<-mean(log10(kinetics_noise$Nint))
ggplot(kinetics_noise) +
geom_point(aes(x=bs, y=k_on, colour = log10(Nint))) +
scale_x_log10() +
scale_y_log10() +
theme_minimal(base_size = 18) +
scale_color_gradient2(midpoint=mid, low="blue", mid="grey",
high="red", space ="Lab" ) +
ylab("Burst Frequency") +
xlab("Burst Size")
mid2<-mean(log10(kinetics_noise$Next))
ggplot(kinetics_noise) +
geom_point(aes(x=bs, y=k_on, colour = log10(Next))) +
scale_x_log10() +
scale_y_log10() +
theme_minimal(base_size = 18) +
scale_color_gradient2(midpoint=mid2, low="blue", mid="grey",
high="red", space ="Lab" ) +
ylab("Burst Frequency") +
xlab("Burst Size")
mid3<-mean(log10(kinetics_noise$Ntot))
ggplot(kinetics_noise) +
geom_point(aes(x=bs, y=k_on, colour = log10(Ntot))) +
scale_x_log10() +
scale_y_log10() +
theme_minimal(base_size = 18) +
scale_color_gradient2(midpoint=mid, low="blue", mid="grey",
high="red", space ="Lab" ) +
ylab("Burst Frequency") +
xlab("Burst Size")
library(ggpubr)
t.test(log10(kinetics_noise$Ntot[which(as.logical(kinetics_noise$anchor))]),log10(kinetics_noise$Ntot[which(!as.logical(kinetics_noise$anchor))]))
##
## Welch Two Sample t-test
##
## data: log10(kinetics_noise$Ntot[which(as.logical(kinetics_noise$anchor))]) and log10(kinetics_noise$Ntot[which(!as.logical(kinetics_noise$anchor))])
## t = -2.3082, df = 319.91, p-value = 0.02163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.081785149 -0.006518294
## sample estimates:
## mean of x mean of y
## -0.08691602 -0.04276430
ggplot(kinetics_noise, aes(x=factor(anchor), Ntot, fill = anchor)) +
geom_violin() +
geom_boxplot(width=0.1) +
theme_minimal(base_size = 12) +
ylab( "Total Transcriptional Noise") +
scale_y_log10() +
xlab("") +
scale_x_discrete(labels = NULL) +
scale_fill_discrete(name = "Enhancer-Promoter \n Contact") +
stat_compare_means(method = "t.test")
t.test(log10(kinetics_noise$Next[which(as.logical(kinetics_noise$anchor))]),log10(kinetics_noise$Next[which(!as.logical(kinetics_noise$anchor))]))
##
## Welch Two Sample t-test
##
## data: log10(kinetics_noise$Next[which(as.logical(kinetics_noise$anchor))]) and log10(kinetics_noise$Next[which(!as.logical(kinetics_noise$anchor))])
## t = 0.49126, df = 328.63, p-value = 0.6236
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04464586 0.07436634
## sample estimates:
## mean of x mean of y
## -0.7837668 -0.7986271
ggplot(kinetics_noise, aes(x=factor(anchor), Next, fill = anchor)) +
geom_violin() +
geom_boxplot(width=0.1) +
theme_minimal(base_size = 12) +
ylab( "Extrinsic Transcriptional Noise") +
scale_y_log10() +
xlab("") +
scale_x_discrete(labels = NULL) +
scale_fill_discrete(name = "Enhancer-Promoter \n Contact") +
stat_compare_means(method = "t.test")
t.test(log10(kinetics_noise$Nint[which(as.logical(kinetics_noise$anchor))]),log10(kinetics_noise$Nint[which(!as.logical(kinetics_noise$anchor))]))
##
## Welch Two Sample t-test
##
## data: log10(kinetics_noise$Nint[which(as.logical(kinetics_noise$anchor))]) and log10(kinetics_noise$Nint[which(!as.logical(kinetics_noise$anchor))])
## t = -2.7027, df = 307.42, p-value = 0.00726
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.11174353 -0.01758512
## sample estimates:
## mean of x mean of y
## -0.2581415 -0.1934771
ggplot(kinetics_noise, aes(x=factor(anchor), Nint, fill = anchor)) +
geom_violin() +
geom_boxplot(width=0.1) +
theme_minimal(base_size = 12) +
ylab( "Intrinsic Transcriptional Noise") +
scale_y_log10() +
xlab("") +
scale_x_discrete(labels = NULL) +
scale_fill_discrete(name = "Enhancer-Promoter \n Contact") +
stat_compare_means(method = "t.test")
ggplot(kinetics_noise, aes(factor(string.Freq), Next)) +
geom_violin(scale = "width", linewidth = 1) +
geom_dotplot(binaxis = "y", binwidth = 0.015, mapping = aes(alpha = factor(string.Freq)), color = "blue") +
scale_alpha_manual(values = c(0, seq(from = 0.2, to = 1 , by = 0.9/20))) +
scale_y_log10() +
geom_smooth(data = kinetics_noise, mapping= aes(Freq, Next), color = "red") +
coord_cartesian(xlim = c(0,11)) +
theme_minimal(base_size = 18) +
theme(legend.position = "none") +
ylab("Extrinsic Noise") +
xlab("Number of Anchored Enhancers")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(kinetics_noise, aes(factor(string.Freq), Ntot)) +
geom_violin(scale = "width", linewidth = 1) +
geom_dotplot(binaxis = "y", binwidth = 0.015, mapping = aes(alpha = factor(string.Freq)), color = "blue") +
scale_alpha_manual(values = c(0, seq(from = 0.2, to = 1 , by = 0.9/20))) +
scale_y_log10() +
geom_smooth(data = kinetics_noise, mapping= aes(Freq, Ntot), color = "red") +
coord_cartesian(xlim = c(0,11)) +
theme_minimal(base_size = 18) +
theme(legend.position = "none") +
ylab("Total Noise") +
xlab("Number of Anchored Enhancers")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
nint_model <- lm(log10(Nint) ~ Freq * (log10(k_on) + log10(bs)), kinetics_noise)
nint_model_logi <- lm(log10(Nint) ~ anchor + log10(k_on) + log10(bs), kinetics_noise)
resettest(nint_model)
##
## RESET test
##
## data: nint_model
## RESET = 79.073, df1 = 2, df2 = 2662, p-value < 2.2e-16
autoplot(nint_model)
vif(nint_model)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
## Freq log10(k_on) log10(bs) Freq:log10(k_on)
## 7.265444 1.288812 1.282231 1.888372
## Freq:log10(bs)
## 6.217500
summary(nint_model)
##
## Call:
## lm(formula = log10(Nint) ~ Freq * (log10(k_on) + log10(bs)),
## data = kinetics_noise)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94236 -0.06866 -0.00269 0.06030 1.40088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.272525 0.008526 31.966 <2e-16 ***
## Freq -0.003946 0.007457 -0.529 0.597
## log10(k_on) -0.934596 0.009026 -103.539 <2e-16 ***
## log10(bs) -0.448763 0.010630 -42.218 <2e-16 ***
## Freq:log10(k_on) -0.017707 0.008952 -1.978 0.048 *
## Freq:log10(bs) 0.015883 0.008136 1.952 0.051 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1537 on 2664 degrees of freedom
## Multiple R-squared: 0.8117, Adjusted R-squared: 0.8113
## F-statistic: 2296 on 5 and 2664 DF, p-value: < 2.2e-16
next_model <- lm(log10(Next) ~ (log10(k_on)) + log10(bs) + Freq, kinetics_noise)
vif(next_model)
## log10(k_on) log10(bs) Freq
## 1.218016 1.216608 1.008407
resettest(next_model)
##
## RESET test
##
## data: next_model
## RESET = 103.07, df1 = 2, df2 = 2664, p-value < 2.2e-16
autoplot(next_model)
summary(next_model)
##
## Call:
## lm(formula = log10(Next) ~ (log10(k_on)) + log10(bs) + Freq,
## data = kinetics_noise)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.75381 -0.18354 0.08278 0.27596 1.48119
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.074698 0.024376 -44.089 <2e-16 ***
## log10(k_on) -0.325383 0.025771 -12.626 <2e-16 ***
## log10(bs) 0.497552 0.030408 16.362 <2e-16 ***
## Freq 0.010101 0.008159 1.238 0.216
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4514 on 2666 degrees of freedom
## Multiple R-squared: 0.2152, Adjusted R-squared: 0.2143
## F-statistic: 243.7 on 3 and 2666 DF, p-value: < 2.2e-16
library(lmtest)
ntot_model <- lm(log10(Ntot) ~ log10(k_on) + log10(bs) + Freq , kinetics_noise)
vif(ntot_model)
## log10(k_on) log10(bs) Freq
## 1.218016 1.216608 1.008407
resettest(ntot_model)
##
## RESET test
##
## data: ntot_model
## RESET = 2.5256, df1 = 2, df2 = 2664, p-value = 0.0802
autoplot(ntot_model)
summary(ntot_model)
##
## Call:
## lm(formula = log10(Ntot) ~ log10(k_on) + log10(bs) + Freq, data = kinetics_noise)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.79064 -0.06640 -0.01308 0.04525 1.09968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.208523 0.006254 33.341 <2e-16 ***
## log10(k_on) -0.807971 0.006612 -122.194 <2e-16 ***
## log10(bs) -0.161937 0.007802 -20.756 <2e-16 ***
## Freq 0.005389 0.002093 2.574 0.0101 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1158 on 2666 degrees of freedom
## Multiple R-squared: 0.8579, Adjusted R-squared: 0.8578
## F-statistic: 5367 on 3 and 2666 DF, p-value: < 2.2e-16
Finding interactions in higher order modeling
ntot_model <- lm(log10(Ntot) ~ (log10(k_on) + log10(bs)) * Freq, kinetics_noise)
vif(ntot_model, type = 'predictor')
## VIFs computed for predictors
## [1] 7.265444 7.265444 7.265444
resettest(ntot_model)
##
## RESET test
##
## data: ntot_model
## RESET = 2.4585, df1 = 2, df2 = 2662, p-value = 0.08575
autoplot(ntot_model)
summary(ntot_model)
##
## Call:
## lm(formula = log10(Ntot) ~ (log10(k_on) + log10(bs)) * Freq,
## data = kinetics_noise)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.79479 -0.06622 -0.01321 0.04500 1.08950
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2122184 0.0064166 33.074 <2e-16 ***
## log10(k_on) -0.8091333 0.0067936 -119.103 <2e-16 ***
## log10(bs) -0.1668531 0.0080002 -20.856 <2e-16 ***
## Freq -0.0081820 0.0056126 -1.458 0.145
## log10(k_on):Freq 0.0001049 0.0067376 0.016 0.988
## log10(bs):Freq 0.0176400 0.0061233 2.881 0.004 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1157 on 2664 degrees of freedom
## Multiple R-squared: 0.8584, Adjusted R-squared: 0.8581
## F-statistic: 3230 on 5 and 2664 DF, p-value: < 2.2e-16
library(interactions)
library(colorspace)
q10 <- qualitative_hcl(10, "Dark2")
interact_plot(ntot_model, pred = bs, modx = Freq, modx.values = c(1:10), colors = q10, legend.main = "# of Enhancers") + theme_minimal(base_size = 18) +
xlab("Burst Size") +
ylab("Total Noise") +
scale_x_log10()
## Using data kinetics_noise from global environment. This could cause
## incorrect results if kinetics_noise has been altered since the model was
## fit. You can manually provide the data to the "data =" argument.